#####
# Replication for Analysis I in:
# "How partisan affect shapes citizens' perception of the political world"
# Electoral Studies 60 (2019) 102045
# Dalston G. Ward and Margit Tavits
#####

# Paper models run under R 3.4.3; Replication file written under  R 3.6.1

library(data.table) # v1.12.2
library(lme4) # v1.1-21
library(lfe) # v.2.8-3
library(texreg) # v1.36.23; Note that for lmer() models, texreg() reports residual and group variances, while the text and SI for this paper report residual and group standard deviations.
library(merTools) # v0.5.0
library(xtable) # v1.8-4
library(MASS) # v7.3-51.4

setwd("Set as appropriate")

ip_dat <- fread("ip_dat.csv", colClasses = c("C1005" = "character"))

#####
# Table 1, Main Text (p.6)
#####

# Note: Chile 2009 (only two knowledge questions), Slovenia 2008 (knowledge missing), Turkey 2011 (knowledge missing), and Uruguay 2009 (knowledge missing) are dropped from all models inlcuding the variable "knowledge". 
affect_mod_PE <- lmer(extreme_place ~ (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party +  age + female + education + knowledge + Party.Like.Dislike + I(Party.Like.Dislike^2) + RPV + PPV + NPV + mcp1 , ip_dat)
summary(affect_mod_PE)
# There is a typo in Table 1 of the main text: sample size should be 167,440; not 167,770.

affect_mod <- lmer(extreme_place ~(1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party +  age + female + education + knowledge + Party.Like.Dislike + I(Party.Like.Dislike^2), ip_dat)
summary(affect_mod)

texreg(
  list(
    affect_mod,
    affect_mod_PE
    ),
  booktabs = T,
  dcolumn = T,
  caption = "Predictors of Perceived Extremism",
  caption.above = T,
  label = "tab:MainResults",
  custom.coef.map = list(
    "Party.Like.Dislike" = "Party Affect",
    "I(Party.Like.Dislike^2)" = "Party Affect$^2$",
    "extreme_self" = "Respondent Extremism",
    "extreme_party" = "Party Extremism",
    "age" = "Age",
    "femaleTRUE" = "Female",
    "education" = "Education",
    "knowledge" = "Political Knowledge",
    "RPVTRUE" = "Reported Party Voter",
    "PPVTRUE" = "Potential Party Voter",
    "NPVTRUE" = "Never Party Voter",
    "mcp1TRUE" = "Most Competent Party",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3
)

#####
# Figure 1, Main Text (p.6)
#####

opar <- par(mar = c(5,5,1,2))

set.seed(8092)

plot_dat <- data.table(affect_mod@frame)

plot( x = plot_dat[ , Party.Like.Dislike], y = plot_dat[ ,extreme_place], ylab = "Expected Perceived Extremism", ylim = c(2,3.5), xlim = c(0,10), xlab = "Party Affect", type = "n")

pred_data <- 
  data.frame(
    C1005 = "036020070000000101",
    PartyElec_id = "AUS_2007_D",
    C1004 = "AUS_2007",
    extreme_party = plot_dat[ ,mean(extreme_party, na.rm = T)],
    extreme_self = plot_dat[ ,mean(extreme_self, na.rm = T)],
    Party.Like.Dislike = 0:10,
    age = plot_dat[ ,mean(age, na.rm = T)],
    female = TRUE,
    education = plot_dat[ ,mean(education, na.rm = T)],
    knowledge = plot_dat[ , mean(knowledge, na.rm = T)]
    )

#Note: This line of code takes approx. 1-2 minutes to run on a 2017 Macbook Pro.
affect_preds <- merTools::predictInterval(merMod = affect_mod, newdata = pred_data, which = "fixed", level = 0.99, n.sims = 1000, stat = "median", include.resid.var = FALSE)

lines( x = 0:10, y = affect_preds$fit, lwd = 2 )
polygon(x = c(0:10,10:0) , y = c(affect_preds$upr, affect_preds[11:1, "lwr"] ), col = "#66666650", border = NA)

# Note: In the paper we say that we used the median value of gender for these predictions. As can be seen in Table SI.1.1, the median is Female = 0, while this analysis uses Female = 1. This is an oversight on our part. Here we show that changing this to what is reported in the main text does not alter the substantive conclusions.

plot( x = plot_dat[ , Party.Like.Dislike], y = plot_dat[ ,extreme_place], ylab = "Expected Perceived Extremism", ylim = c(2,3.5), xlim = c(0,10), xlab = "Party Affect", type = "n")

pred_data2 <- 
  data.frame(
    C1005 = "036020070000000101",
    PartyElec_id = "AUS_2007_D",
    C1004 = "AUS_2007",
    extreme_party = plot_dat[ ,mean(extreme_party, na.rm = T)],
    extreme_self = plot_dat[ ,mean(extreme_self, na.rm = T)],
    Party.Like.Dislike = 0:10,
    age = plot_dat[ ,mean(age, na.rm = T)],
    female = FALSE,
    education = plot_dat[ ,mean(education, na.rm = T)],
    knowledge = plot_dat[ , mean(knowledge, na.rm = T)]
  )

#Note: This line of code takes approx. 1-2 minutes to run on a 2017 Macbook Pro.
affect_preds2 <- merTools::predictInterval(merMod = affect_mod, newdata = pred_data2, which = "fixed", level = 0.99, n.sims = 1000, stat = "median", include.resid.var = FALSE)

lines( x = 0:10, y = affect_preds2$fit, lwd = 2 )
polygon(x = c(0:10,10:0) , y = c(affect_preds2$upr, affect_preds2[11:1, "lwr"] ), col = "#66666650", border = NA)

par(opar)

#####
# Expected values, Main Text (p. 5)
#####

data.frame("Affect" = 0:10, "Expectations_Women" = affect_preds$fit)

# ... and the same when Female = 0 instead of Female = 1:
data.frame("Affect" = 0:10, "Expectations_Men" = affect_preds2$fit)

#####
# Descriptions of Sample Size, Main Text (pp. 1, 2, 4-5)
#####

# N countries (pp. 1-2, 4)
ip_dat[ , uniqueN(CSES_country)] 
# Returns 38, which includes Chile, Slovenia, Turkey and Uruguay,
# which are included in the data but, due to missing knowledge items,
# are excluded from the main models. Removing these leaves 34, as 
# reported in the main text. (For more on these countries exclusions,
# see above and SI p. 4)

# N elections (pp. 1, 4)
ip_dat[ , uniqueN(C1004)] 
# again includes the 4 elections described above.
# After their remove, gives 43 as reported in the text.

# Included years (p. 4)
ip_dat[ , summary(as.numeric(substr(C1004, 5, 8)))]

# N parties (p.4)
ip_dat[ !CSES_country %in% c("CHL", "SLO", "TUR", "URY"), uniqueN(unique_party)]
# Excludes the four elections not included in the data. 
# Also uses a party identifier that is constant over elections
# for countries with two elections in the CSES 
# (note, the main analysis uses a party id measure that 
# has separate values for a given party in separate elections).

# To more exactly reproduce the 230 reported in the text,
# need to drop observations not included in the main models
# and use the time constant party identifier, as done below:
ip_dat[ !CSES_country %in% c("CHL", "SLO", "TUR", "URY") & !is.na(extreme_place) & !is.na(extreme_self) & !is.na(extreme_party) & !is.na(age) & !is.na(female) & !is.na(education) & !is.na(knowledge) & !is.na(Party.Like.Dislike), uniqueN(unique_party)] 

# N observations (p.4)
# We produced this value based on only those individuals
# included in the main models. See also the regression output
# for the models in Table 1, shown above.
# (Total N for respondent-party dyads with measurement on 
# the outcome (perceived extremism) and the predictor (party affect)
# is 370,767; see Table SI.1.3 and code below)
ip_dat[ !CSES_country %in% c("CHL", "SLO", "TUR", "URY") & !is.na(extreme_place) & !is.na(extreme_self) & !is.na(extreme_party) & !is.na(age) & !is.na(female) & !is.na(education) & !is.na(knowledge) & !is.na(Party.Like.Dislike) , .N]

#####
# Table SI.1.1 Descriptive Statistics (SI p.1)
#####

custom_sum <- function(x, ...){
  c("Min" = min(x, ...), "Median" = median(x, ...), "Mean" = mean(x, ...), "Max" =  max(x, ...), "SD" = sd(x, ...), "N" = sum(!is.na(x)))
}

ParResp_sum <- 
  t(apply(affect_mod@frame[,c(
    "Party.Like.Dislike",
    "extreme_place",
    "age",
    "female", 
    "education",
    "knowledge",
    "extreme_self",
    "extreme_party"
    )], 2, custom_sum, na.rm = T))

row.names(ParResp_sum) <- c("Party Affect", "Perceived Extremism", "Age", "Female", "Education", "Knowledge", "Respondent Extremism", "Party Extremism")
xtable(ParResp_sum, digits = 2)

PE_sum <- t(apply(affect_mod_PE@frame[,c("RPV", "PPV", "NPV", "mcp1")], 2, custom_sum, na.rm = T))

row.names(PE_sum) <- c("Reported Party Voter", "Potential Party Voter", "Never Party Voter", "Most Competent Party")

xtable(PE_sum, digits = 2)

#####
# Table SI.1.2 Ordered Logits (SI p.2) & predicted probabilities (SI p.3)
#####

#### takes a LONG time to run (10+ minutes on 2017 Macbook Pro)! #####
polr_affect_P <- polr(factor(extreme_place) ~  extreme_self + Party.Like.Dislike + I(Party.Like.Dislike^2) + age + female + education + knowledge + factor(PartyElec_id), ip_dat[], Hess = TRUE)
summary(polr_affect_P)

# This one is much faster.
polr_affect_E <- polr(factor(extreme_place) ~  factor(C1004) + extreme_self + extreme_party + Party.Like.Dislike + I(Party.Like.Dislike^2) + age + female + education + knowledge, ip_dat[], Hess = TRUE)
summary(polr_affect_E)

# Need to do some manual replacement of factor levels to make predict work
pred_lev <- list( "factor(C1004)" = c("AUS_2007", gsub("factor(C1004)", "", names(coefficients(polr_affect_E))[1:42], fixed = T)))
polr_affect_E$xlevels <- pred_lev

pred_data_polr <- 
  data.frame(
    C1004 = "AUS_2007",
    extreme_self = mean(polr_affect_E$model$extreme_self, na.rm = T),
    extreme_party = mean(polr_affect_E$model$extreme_party, na.rm = T),
    Party.Like.Dislike = c(2,5,8),
    Party.Like.Distlike_sq = c(2,5,8)^2,
    age = mean(polr_affect_E$model$age, na.rm = T),
    female = TRUE,
    education = mean(polr_affect_E$model$education, na.rm = T),
    knowledge = mean(polr_affect_E$model$knowledge, na.rm = T)
    )

# Predictions described on p.2, SI
predict(polr_affect_E, newdata = pred_data_polr, type = "probs")

texreg(
  list(
    polr_affect_E,
    polr_affect_P
    ),
  omit.coef = "(C1004)|(PartyElec)",
  include.aic = F,
  include.bic = F,
  include.dev = F,
  include.loglik = F,
  digits = 3,
  dcolumn = TRUE,
  booktabs = TRUE
  )

#####
# Table SI.1.3 Fixed-Effects Regressions (SI p.5)
#####

mod_like_LFE <- felm(extreme_place ~ extreme_party + Party.Like.Dislike + I(Party.Like.Dislike^2) |  C1005 | 0 | C1005  , data = ip_dat[])
summary(mod_like_LFE)

mod_like_LFE2 <- felm(extreme_place ~ Party.Like.Dislike + I(Party.Like.Dislike^2) | C1005 + PartyElec_id  | 0 | C1005, data = ip_dat[])
summary(mod_like_LFE2)

mod_like_LFE3 <- felm(extreme_place ~ Party.Like.Dislike + I(Party.Like.Dislike^2) | C1005 + PartyElec_id  | 0 | C1005 + PartyElec_id, data = ip_dat[])
summary(mod_like_LFE3)

texreg(
  list(
    mod_like_LFE,
    mod_like_LFE2,
    mod_like_LFE3
    ),
  dcolumn = T,
  booktabs = T
  )

#####
# Table SI.1.4 Robustness Tests (SI p.8)
#####

mod_CMP <- lmer(extreme_place ~  (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party_CMP1 + Party.Like.Dislike + I(Party.Like.Dislike^2) +  age + female + education + knowledge, ip_dat[])
summary(mod_CMP)

mod_CHES <- lmer(extreme_place ~  (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party_CHES1 + Party.Like.Dislike + I(Party.Like.Dislike^2) +  age + female + education + knowledge, ip_dat[])
summary(mod_CHES)

mod_fac <- lmer(extreme_place ~ (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party +  age + female + education + knowledge + PLD_0 + PLD_1 + PLD_2 + PLD_3 + PLD_4 + PLD_6 + PLD_7 + PLD_8 + PLD_9 + PLD_10, ip_dat[])
summary(mod_fac)

mod_abs <- lmer(extreme_place ~ (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party +  age + female + education + knowledge + PLD_abs, ip_dat[])
summary(mod_abs)

mod_advanced <- lmer(extreme_place ~  (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party + Party.Like.Dislike + I(Party.Like.Dislike^2) +  age + female + education + knowledge + advanced, ip_dat[])
summary(mod_advanced)

mod_enep <- lmer(extreme_place ~  (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party + Party.Like.Dislike + I(Party.Like.Dislike^2) +  age + female + education + knowledge + enep_CSES, ip_dat[])
summary(mod_enep)

# Note that this model excludes three elections without
# lower-house vote shares: Japan (2007; only upper house 
# available); Romania (2009; only presidential available);
# Taiwan (2008; only presidential available). It also 
# excludes some smaller parties (or parties that were part 
# of coalitions) that do not have their lower house vote
# share recorded in the CSES data.
mod_psize <- lmer(extreme_place ~  (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party + Party.Like.Dislike + I(Party.Like.Dislike^2) +  age + female + education + knowledge + votes_LH, ip_dat[])
summary(mod_psize)

texreg(
  list(
    affect_mod,
    mod_CMP,
    mod_CHES,
    mod_fac,
    mod_abs,
    mod_advanced,
    mod_enep,
    mod_psize
    ),
  dcolumn = T,
  booktabs = T,
  label = NULL,
  caption = "\\textbf{Table SI.1.4:} Robustness Tests for Moels of \\textit{Perceived Extremism}",
  digits = 3,
  caption.above = T
  )

#####
# Table SI.1.5 Directional Effects (SI p.11)
#####

direc_mod <- lmer(extreme_by_side ~ (1|C1005) + (1|PartyElec_id) + (1|C1004) + Party.Like.Dislike + extreme_self + extreme_party_by_side  +  age + female + education + knowledge, ip_dat[])
summary(direc_mod)

texreg(direc_mod,
       custom.coef.names = c(
         "Intercept",
         "Party Affect",
         "Respondent Extremism",
         "Directional Party Extremism",
         "Age",
         "Female",
         "Education",
         "Knowledge"
         ),
       reorder.coef = c(2:8, 1),
       digits = 3,
       dcolumn = T,
       booktabs = T,
       label = NULL,
       caption = "\\textbf{Table SI.1.2:} Party Affect and Directional Perceived Extremism",
       caption.above = T
       )

#####
# Table SI.1.6 Projection Effects (SI p.13)
#####

affect_mod_PES <- lmer(extreme_place ~ (1|C1005) + (1|PartyElec_id) + (1|C1004) + extreme_self + extreme_party +  age + female + education + knowledge + Party.Like.Dislike + I(Party.Like.Dislike^2), ip_dat[RPV == F & PPV == F & NPV == F])
summary(affect_mod_PES)

texreg(
  list(affect_mod_PES),
  caption.above = T,
  caption = "Predictors of Extreme Perceptions for Dyads with All Vote-Choice Measures Equal to Zero",
  label = "tab:subset",
  booktabs = T,
  dcolumn = T,
  custom.coef.map = list(
    "Party.Like.Dislike" = "Party Affect",
    "I(Party.Like.Dislike^2)" = "Party Affect$^2$",
    "extreme_self" = "Respondent Extremism",
    "extreme_party" = "Party Extremism",
    "age" = "Age",
    "femaleTRUE" = "Female",
    "education" = "Education",
    "knowledge" = "Political Knowledge",
    "RPVTRUE" = "Reported Party Voter",
    "PPVTRUE" = "Potential Party Voter",
    "NPVTRUE" = "Never Party Voter",
    "mcp1TRUE" = "Most Competent Party",
    "(Intercept)" = "Intercept"
  ),
  include.aic = F,
  include.bic = F,
  include.loglik = F,
  digits = 3
)
